/*******************************************************************************
																				
	DESCRIPTION:  	This do file prepares the predictions for the duration 
					dependence analysis (i.e., the individual-level regressions
					of the predictions from the 0M, 6M and 12M models on the 0M
					sample), which is performed in R.
	
*******************************************************************************/

clear all
global id_code 119_1
pause on
set seed 2110


* Set globals:
global model Full	
global y0 2006
global y1 2006

if $y0 != $y1 {
	global y "Pooled_${y0}_${y1}"
}
else if $y0 == $y1 {
	global y $y0
}
		

*******************************************************************************
 * Load the data
*******************************************************************************
use "${data}/003_MainWithEnsemblePred_${model}_${y}.dta", clear


*******************************************************************************
 * Obtain the regression line for each unemployment duration:
*******************************************************************************		

foreach months in 6 {
		  
	foreach unempl in 0 6 12 {

		preserve 
		keep if inrange(year(startU + `unempl'*30), $y0 , $y1 )

		* regress empirical JFR on predicted JFR and store coefficients and R-squared
		reg emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In
		local b0_`unempl'M = _b[_cons]
		local b1_`unempl'M = _b[p_emplAft`months'M_`unempl'M_In]

		
		restore
	}
}



*******************************************************************************
 * Load Predictions made with other models
*******************************************************************************

* Merge with predictions from models trained on individuals who are 
* unemployed for a different amount of time than those for whom the
* predictions are made

merge 1:1 LopNr_PersonNr InLnr ///
	using "${data}/003_MainWithEnsemblePred_Full_${y}_xMonthPred_yMonthModel.dta" ///
	, keepusing(p_emplAft6M_0M_In_6M_Mod p_emplAft6M_0M_In_12M_Mod) ///
	 assert(2 3) keep(3) nogen


*******************************************************************************
 * Shrink predictions using the regression lines calculated above:
***********************************************************************				

* Get rid of observations with missing data:
keep if !missing(p_emplAft6M_0M_In, p_emplAft6M_0M_In_6M_Mod, ///
	p_emplAft6M_0M_In_12M_Mod)
	
* Generate a single unique spell identifier
gen key = _n
global total = _N

* Generate the adjusted predictions
frame copy default data_ShrunkPred
frame change data_ShrunkPred

gen p_emplAft6M_0M_In0 = `b0_0M' + `b1_0M' * p_emplAft6M_0M_In
gen p_emplAft6M_0M_In6 = `b0_6M' + `b1_6M' * p_emplAft6M_0M_In_6M_Mod
gen p_emplAft6M_0M_In12 = `b0_12M' + `b1_12M' * p_emplAft6M_0M_In_12M_Mod

drop p_emplAft6M_0M_In

* Change to long format:
reshape long p_emplAft6M_0M_In, i(LopNr_PersonNr InLnr) j(ModelMonth)

* Keep only necessary variables for regressions:
keep key p_emplAft6M_0M_In ModelMonth

save "${data}/${id_code}_DataForR_DurationDependenceBetas_Full_${y}_ShrunkPred.dta", replace


frame change default	

* Save the unadjusted predictions:
frame copy default data
frame change data

rename p_emplAft6M_0M_In p_emplAft6M_0M_In0  
rename p_emplAft6M_0M_In_6M_Mod p_emplAft6M_0M_In6 
rename p_emplAft6M_0M_In_12M_Mod p_emplAft6M_0M_In12

* Change to long format:
reshape long p_emplAft6M_0M_In, i(LopNr_PersonNr InLnr) j(ModelMonth)

* Winsorize JFR = 0:
replace p_emplAft6M_0M_In = 0.001 if p_emplAft6M_0M_In <= 0.001

* Keep only necessary variables for regressions:
keep key p_emplAft6M_0M_In ModelMonth

save "${data}/${id_code}_DataForR_DurationDependenceBetas_Full_${y}.dta", replace
frame change default

* Save default dataset to merge regression results later:
save "${data}/${id_code}_DataForR_DurationDependenceBetas_Full_${y}_MergeOn.dta", replace
		